**********************************************************************************************************;
** PROGRAM DESCRIPTION: DESCRIPTIVE STATISTICS PAPER FINTECH											**;
** INPUT DATASET: 	FINTECH_DATA_RCFS																	**;
** BANK FINANCIAL TECNOLOGY VARIABLES CANNOT BE DISSEMINATED AND HAVE BEEN REPLACED BY PSEUDO-DATA.		**;
** This program reproduces all descriptive analyses (i.e. table 1 in Appendix B and figures 4 to 10).	**;
** Results in table 1 in Appendix B are replicable for all variables but those randomly generated.		**;
** Results for figures 4,6,7,8,9,and 10 can be replicated while those for figure 5 are not replicable.	**;
** For a description of the variables see the document "data and programs description"					**;
**********************************************************************************************************;

* data preparation;

DATA ALL; SET FINTECH_DATA_RCFS;

ETA51=(eta5=1);
ETA52=(eta5=2);
ETA53=(eta5=3);
ETA54=(eta5=4);
ETA55=(eta5=5);


QUALP31=(QUALP3=1);
QUALP32=(QUALP3=2);
QUALP33=(QUALP3=3);


AREA51=(AREA5=1);
AREA52=(AREA5=2);
AREA53=(AREA5=3);
AREA54=(AREA5=4);
AREA55=(AREA5=5);


CLYEQ21=(CLYEQ2=1);
CLYEQ22=(CLYEQ2=2);
CLYEQ23=(CLYEQ2=3);
CLYEQ24=(CLYEQ2=4);
CLYEQ25=(CLYEQ2=5);


STUDIO51=(STUDIO5=1);
STUDIO52=(STUDIO5=2);
STUDIO53=(STUDIO5=3);
STUDIO54=(STUDIO5=4);
STUDIO55=(STUDIO5=5);

 
TIPOFAM1=(TIPOFAM=1);
TIPOFAM2=(TIPOFAM=2);
TIPOFAM3=(TIPOFAM=3);
TIPOFAM4=(TIPOFAM=4);
TIPOFAM5=(TIPOFAM=5);
TIPOFAM6=(TIPOFAM=6);

ACOM4C1=(ACOM4C=1);
ACOM4C2=(ACOM4C=2);
ACOM4C3=(ACOM4C=3);
ACOM4C4=(ACOM4C=4);

RISFIN1=(RISFIN=1);
RISFIN2=(RISFIN=2);
RISFIN3=(RISFIN=3);
RISFIN4=(RISFIN=4);

FEMALE=(sesso=2);
MALE=(sesso=1);
TIPOFIN=.;
IF HHFINTOT=1 THEN TIPOFIN=1;
IF LLFINTOT=1 THEN TIPOFIN=2;

run;


PROC FORMAT; 
 VALUE TIPOFINf 
 1 ="SOPHISTICATED" 
 2 ="UNSOPHISTICATED" 
 . ="MIDDLE SOPHISTICATED" ;
;
proc format;
 value iregf
1	="Piedmont"
2	="Aosta Valley"
3	="Lombardy"
4	="Trentino-Alto-Adige"
5	="Veneto"
6	="Friuli-Venezia-Giulia"
7	="Liguria"
8	="Emilia Romagna"
9	="Tuscany"
10	="Umbria"
11	="Marche"
12	="Lazio"
13	="Abruzzo"
14	="Molise"
15	="Campania"
16	="Apulia"
17	="Basilicata"
18	="Calabria"
19	="Sicily"
20	="Sardinia";
run;
data all2; set all;

label
ETA51 			= "30 and under"
ETA52 			= "31-40"
ETA53 			= "41-50"
ETA54 			= "51-65"
ETA55 			= "over 65"

TIPOFAM1 			= "Single person over 65"
TIPOFAM2 			= "Single person under 65"
TIPOFAM3 			= "Couple without children"
TIPOFAM4 			= "Couple with children"
TIPOFAM5 			= "Single parent with children"
TIPOFAM6 			= "Other household type"

COLDIS 			= "Remote connection to bank"
ROR 			= "Rate of financial returns"
QHRISK 			= "Share of risky assets"

QUALP31 			= "Employee"
QUALP32 			= "Self-employed"
QUALP33 			= "Not employed"

ACOM4C1 			= "up to 20.000 inhabitants"
ACOM4C2 			= "from 20.000 to 40.000 inhabitants"
ACOM4C3 			= "from 40.000 to 500.000 inhabitants"
ACOM4C4 			= "over 500.000 inhabitants"


STUDIO51 			= "no schooling"
STUDIO52 			= "primary school certificate"
STUDIO53 			= "lower secondary school certificate"
STUDIO54 			= "upper secondary school diploma"
STUDIO55 			= "university degree"


CLYEQ21 			= "first quintile"
CLYEQ22 			= "second quintile"
CLYEQ23 			= "third quintile"
CLYEQ24 			= "fourth quintile"
CLYEQ25 			= "fifth quintile"

RISFIN1 			= "no risk aversion"
RISFIN2 			= "low risk aversion"
RISFIN3 			= "medium risk aversion"
RISFIN4 			= "high risk aversion";


format
tipofin TIPOFINf.
IREG iregf.;
QHRISKPER=QHRISK*100;
 RUN;

TITLE "  ";

** TABLE 1 APPENDIX B: Results in table 1 in Appendix B are replicable for all variables except those randomly generated.		**;

PROC TABULATE DATA = ALL2 MISSING NOSEPS ; 
  CLASS TIPOFIN ; VAR FINTOTHAT HDIGF DIGFHATREG  HHFINTOT QHRISK ROR    RISFIN1 RISFIN2 RISFIN3 RISFIN4 TIPOFAM1 TIPOFAM2 TIPOFAM3 TIPOFAM4 TIPOFAM5 TIPOFAM6
ETA51 ETA52 ETA53 ETA54 ETA55  CLYEQ21 CLYEQ22 CLYEQ23 CLYEQ24 CLYEQ25 ACOM4C1 ACOM4C2 ACOM4C3 ACOM4C4 MALE FEMALE COLDIS STUDIO51 STUDIO52 STUDIO53 STUDIO54 STUDIO55 
QUALP31 QUALP32 QUALP33 STRAN; 
  TABLE (FINTOTHAT HDIGF DIGFHATREG COLDIS QHRISK ROR  CLYEQ21 CLYEQ22 CLYEQ23 CLYEQ24 CLYEQ25  RISFIN1 RISFIN2 RISFIN3 RISFIN4 TIPOFAM1 TIPOFAM2 TIPOFAM3 TIPOFAM4 TIPOFAM5 TIPOFAM6
ETA51 ETA52 ETA53 ETA54 ETA55 ACOM4C1 ACOM4C2 ACOM4C3 ACOM4C4 MALE FEMALE STRAN STUDIO51 STUDIO52 STUDIO53 STUDIO54 STUDIO55 
QUALP31 QUALP32 QUALP33),(TIPOFIN ALL)*(mean STD)/NOCELLMERGE ;
 TITLE " APPENDIX B - TABLE 1"; 
  WEIGHT PESO;
RUN;

** FIGURES 4 TO 10: Results for figures 4,6,7,8,9,and 10 can be replicated while those for figure 5 are not replicable.	**;

** FIGURE 4;

PROC TABULATE DATA = ALL2 MISSING NOSEPS ; where anno IN (2004,2012,2020);
  CLASS anno IREG; VAR COLDIS; 
     FORMAT IREG iregf. ;   
  TABLE (IREG),(ANNO)*(COLDIS)*mean;
 TITLE " FIGURE 4. Remote banking connections BY YEAR AND REGION"; 
  WEIGHT PESO;
RUN;

** FIGURE 5: THESE RESULTS CANNOT BE REPLICATED AS DIGFHAT HAS BEEN REPLACED WITH PSEUDO-DATA;;

PROC TABULATE DATA = ALL2 MISSING NOSEPS ; where anno IN (2004,2012,2020);
  CLASS anno IREG; VAR DIGFHAT; 
     FORMAT IREG iregf. ;   
  TABLE (IREG),(ANNO)*(DIGFHAT)*mean;
 TITLE " FIGURE 5. Digital financial services to households BY YEAR AND REGION"; 
  WEIGHT PESO;
RUN;

** FIGURE 6;

PROC TABULATE DATA = ALL2 MISSING NOSEPS ; *where anno=2004;
  CLASS anno HHFINTOT LLFINTOT; VAR ror; 
  TABLE (anno),(HHFINTOT LLFINTOT ALL)*(ror)*   
mean;
 TITLE " FIGURE 6. AVARAGE RATE OF FINANCIAL RETURNS FOR SOPHISTICATED AND UNSOPHISTICATED"; 
  WEIGHT PESO;
RUN;


** FIGURE 7;
PROC TABULATE DATA = ALL2 MISSING NOSEPS ; *where anno=2004;
  CLASS anno HHFINTOT LLFINTOT ; VAR QHRISKPER ; 
  TABLE (anno),(HHFINTOT LLFINTOT ALL)*(QHRISKPER)*mean/NOCELLMERGE ;
 TITLE " FIGURE 7. AVARAGE SHARE OF RISKY ASSETS FOR SOPHISTICATED AND UNSOPHISTICATED"; 
  WEIGHT PESO;
RUN;


** FIGURE 8;
PROC TABULATE DATA = all2 S=[cellwidth=150];
title "FIGURE 8. Portfolio composition across financial literacy quartiles (2004-2020)"; 
CLASS CLFL2 ;
VAR AF1 AF2 AF3 AF_L/S=[ cellwidth=150 textindent=0];                                
TABLE CLFL2,   
 (AF1 AF2 AF3 AF_L)*PCTSUM< AF_L >=" (share on total wealth)"*F=COMMA6.1  
/ NOCELLMERGE rts=19 BOX='finlit quartile (*)';
WEIGHT PESO;  
where anno GE 2004;
RUN;


** FIGURE 9;
PROC TABULATE DATA = ALL2 S=[cellwidth=150];
CLASS anno ;
VAR pazQ;                                
TABLE anno,    
pazQ*MEAN*F=   COMMA8.0*F=COMMA8.3
/ NOCELLMERGE rts=19 BOX='year (*)';
 TITLE "FIGURE 9. Stock market participation over time"; 
WEIGHT PESO;                                                                
RUN;



** FIGURE 10;
PROC TABULATE DATA = all2 S=[cellwidth=150];
title "FIGURE 10. rate of returns by financial literacy quartile "; 
CLASS CLFL2 anno ;
VAR YCF_L/S=[ cellwidth=150 textindent=0];                                
TABLE anno*CLFL2,   
 YCF_L*MEAN="(mean)"*F=COMMA8.2                               
/ NOCELLMERGE rts=19 BOX='finlit quartile  (*)';
WEIGHT PESO;  
where anno GE 2004;
RUN;


TITLE "  ";